1.load packages

library(tictoc)
library(formattable)
library(data.table)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(GGally)
library(plotly) 
library(gridExtra)
library(corrplot)
library(caret)
library(ggthemes)
library(RColorBrewer)
library(fmsb)
library(rpart.plot)
library(ROCR)
library(mlr3learners.gbm)
library(ranger)
library(mlr3)
library(mlr3learners)
library(mlr3measures)
library(mlr3pipelines)
library(mlr3tuning)
library(mlr3filters)
library(mlbench)
library(mlr3misc)
library(caret)
library(mlr3viz)
library(corrplot)
library(mlr3verse)

2. Read Datasets

original Datasets from Kaggle: Here pokemon800.csv : Information about each Pokemon combats.csv : first Pokemon ID, second Pokemon ID, winner ID

pokemon <-read.csv("pokemon800.csv", header = TRUE, stringsAsFactors=TRUE) #read dataset
combats <-read.csv("combats.csv", header = TRUE, stringsAsFactors = TRUE) #read dataset
colnames(pokemon)<-c("id","name","type_1","type_2","hp","attack","defense",
                     "sp_attack","sp_defense","speed","generation","is_legendary")
pokemon$id <- as.character(pokemon$id) 

3. Descriptive Statistik : EDA Pokemon

Visit ShinyApp:

Visit My Shiny App for Pokemon EDA

Create New Dataset for Prediction

In order to predict winner Pokemon, we need to create new Pokemon dataset using Pokemon information, as dataset “combats” includes only the information of Pokemon IDs.

daten <- combats
#find names
daten$First_pokemon_name<-sapply(daten$First_pokemon, function(x) pokemon$name[match(x, pokemon$id)])
daten$Second_pokemon_name<-sapply(daten$Second_pokemon, function(x) pokemon$name[match(x, pokemon$id)])
daten$Winner_name<-sapply(daten$Winner, function(x) pokemon$name[match(x, pokemon$id)])
daten$First_wins <- daten$First_pokemon == daten$Winner
# calculate stat differences
daten$First_pokemon_attack<-sapply(daten$First_pokemon, function(x) pokemon$attack[match(x, pokemon$id)])
daten$Second_pokemon_attack<-sapply(daten$Second_pokemon, function(x) pokemon$attack[match(x, pokemon$id)])
daten$First_pokemon_hp<-sapply(daten$First_pokemon, function(x) pokemon$hp[match(x, pokemon$id)])
daten$Second_pokemon_hp<-sapply(daten$Second_pokemon, function(x) pokemon$hp[match(x, pokemon$id)])
daten$First_pokemon_defense<-sapply(daten$First_pokemon, function(x) pokemon$defense[match(x, pokemon$id)])
daten$Second_pokemon_defense<-sapply(daten$Second_pokemon, function(x) pokemon$defense[match(x, pokemon$id)])
daten$First_pokemon_sp_atk<-sapply(daten$First_pokemon, function(x) pokemon$sp_attack[match(x, pokemon$id)])
daten$Second_pokemon_sp_atk<-sapply(daten$Second_pokemon, function(x) pokemon$sp_attack[match(x, pokemon$id)])
daten$First_pokemon_sp_def<-sapply(daten$First_pokemon, function(x) pokemon$sp_defense[match(x, pokemon$id)])
daten$Second_pokemon_sp_def<-sapply(daten$Second_pokemon, function(x) pokemon$sp_defense[match(x, pokemon$id)])
daten$First_pokemon_speed<-sapply(daten$First_pokemon, function(x) pokemon$speed[match(x, pokemon$id)])
daten$Second_pokemon_speed<-sapply(daten$Second_pokemon, function(x) pokemon$speed[match(x, pokemon$id)])
daten$attackVSattack_diff <- daten$First_pokemon_attack-daten$Second_pokemon_attack
daten$defenseVSdefense_diff <- daten$First_pokemon_defense-daten$Second_pokemon_defense
daten$sp_atkVSsp_atk_diff <- daten$First_pokemon_sp_atk-daten$Second_pokemon_sp_atk
daten$sp_defVSsp_def_diff <- daten$First_pokemon_sp_def-daten$Second_pokemon_sp_def
daten$speedVSspeed_diff <- daten$First_pokemon_speed-daten$Second_pokemon_speed
daten$HPVSHP_diff <- daten$First_pokemon_hp-daten$Second_pokemon_hp
#first Pokemon faster?
daten$First_pokemon_faster <- sign(daten$speedVSspeed_diff)
#add legendary status
daten$First_pokemon_legendary<-sapply(daten$First_pokemon, function(x) pokemon$is_legendary[match(x, pokemon$id)])%>%as.logical()
daten$Second_pokemon_legendary<-sapply(daten$Second_pokemon, function(x) pokemon$is_legendary[match(x, pokemon$id)])%>%as.logical()
# higher speed is defenitely an advantage for a Pokemon in a battle, since it defines which Pokemon may act first 
# If the speed of two fighting Pokemons is the same, the beginner will be defined randomly
# This means positive values should have a positive impact on First_wins and negative values a negative impact. A value of zero however should have no impact since the beginner is defined completely random. 
# Here you can see that speed difference has the highest influence on our model, but still there are no wins for the First_pokemon in case of zero speed difference.
table(daten$First_pokemon_faster,daten$First_wins)
##     
##      FALSE  TRUE
##   -1 23218  1120
##   0   1328     0
##   1   1853 22481
# Since this is very unlikely to happen randomly, we assume that there is a data error for those specific observations
#exclude all of those observations with equal speed of both Pokemon
daten$First_wins <- daten$First_wins  %>% as.factor()
daten <- daten[!daten$First_pokemon_faster == 0,]

# convert the logical variables into factor
for(i in 1:ncol(daten)){
  if(class(daten[,i]) == "logical"){
    daten[,i] <- as.factor(daten[,i])
  }
}

Select useful variables and make temporal task for ML project Task : Binary Classification Target Value to predict : Is first Pokemon the winner? (if not, second Pokemon wins of course :)) True/False

Backend <- daten %>% dplyr::select(c("attackVSattack_diff", 
                                     "defenseVSdefense_diff",
                                     "sp_atkVSsp_atk_diff",
                                     "sp_defVSsp_def_diff",     
                                     "speedVSspeed_diff",
                                     "HPVSHP_diff",
                                     "First_pokemon_legendary", 
                                     "Second_pokemon_legendary",
                                     "First_wins"))

task_combat = TaskClassif$new(id = "task_combat", backend = Backend, target = "First_wins")

4. Feature Selection using the package “mlr3filter”

based on information_gain :- entropy-based feature evaluation

                        - for tree model : 
                          information gain decides which feature should be used to split the data
filter = flt("information_gain")  
filter$calculate(task_combat)
formattable(as.data.table(filter), list(score = color_tile("transparent", "lightpink")))
feature score
speedVSspeed_diff 0.471350986
attackVSattack_diff 0.073598085
sp_atkVSsp_atk_diff 0.068886552
sp_defVSsp_def_diff 0.033694959
HPVSHP_diff 0.031579065
First_pokemon_legendary 0.015948659
Second_pokemon_legendary 0.015053922
defenseVSdefense_diff 0.009023457

5. Reset Task with the selected Features

Backend <- daten %>% dplyr::select(speedVSspeed_diff,
                                   attackVSattack_diff,
                                   sp_atkVSsp_atk_diff,
                                   sp_defVSsp_def_diff,
                                   HPVSHP_diff,
                                   First_wins)

task_combat = TaskClassif$new(id = "task_combat", backend = Backend, target = "First_wins")

6.Choose your favorite Learners

learner_glmnet <- mlr3::lrn("classif.glmnet", predict_type = "prob")
learner_log <- mlr3::lrn("classif.log_reg", predict_type = "prob")
learner_nb <- mlr3::lrn("classif.naive_bayes", predict_type = "prob")
learner_kknn <- mlr3::lrn("classif.kknn", predict_type = "prob")
learner_RF <- mlr3::lrn("classif.ranger", predict_type = "prob")
learner_xgboost <- mlr3::lrn("classif.xgboost", predict_type = "prob")

7.One Hot Encoding using pipeline

fencoder = po("encode", method = "treatment",
              affect_columns = selector_type("factor"))
fencoder$train(list(task_combat))
## $output
## <TaskClassif:task_combat> (48672 x 6)
## * Target: First_wins
## * Properties: twoclass
## * Features (5):
##   - int (5): HPVSHP_diff, attackVSattack_diff, sp_atkVSsp_atk_diff, sp_defVSsp_def_diff,
##     speedVSspeed_diff
pipe = fencoder %>>% learner_glmnet
learner_glmnet = GraphLearner$new(pipe)

pipe = fencoder %>>% learner_log
learner_log = GraphLearner$new(pipe)

pipe = fencoder %>>% learner_nb
learner_nb = GraphLearner$new(pipe)

pipe = fencoder %>>% learner_kknn
learner_kknn = GraphLearner$new(pipe)

pipe = fencoder %>>% learner_RF
learner_RF = GraphLearner$new(pipe)

pipe = fencoder %>>% learner_xgboost
learner_xgboost = GraphLearner$new(pipe)

8.Split train /test (saved as index) 80:20

set.seed(1234)
train_set = sample(task_combat$nrow, 0.8 * task_combat$nrow)
test_set = setdiff(seq_len(task_combat$nrow), train_set)

9.Training chosen learners

set.seed(1234)
learner_glmnet$train(task_combat, row_ids = train_set)
learner_log$train(task_combat, row_ids = train_set)
learner_nb$train(task_combat, row_ids = train_set)
learner_kknn$train(task_combat, row_ids = train_set)
learner_RF$train(task_combat, row_ids = train_set)
learner_xgboost$train(task_combat, row_ids = train_set)

10.Prediction

## [1] "glmnet prediction performances :"
##  classif.ce classif.acc 
##  0.90652286  0.09347714
## [1] "logreg prediction performances :"
##  classif.ce classif.acc 
##  0.09912686  0.90087314
## [1] "naive bayes prediction performances :"
##  classif.ce classif.acc 
##   0.1633282   0.8366718
## [1] "kknn prediction performances :"
##  classif.ce classif.acc 
##  0.07693888  0.92306112
## [1] "ragner(RF) prediction performances :"
##  classif.ce classif.acc 
##  0.05649718  0.94350282
## [1] "xgboost prediction performances :"
##  classif.ce classif.acc 
##  0.05721623  0.94278377

11.Tuning :

terminator <- term("evals", n_evals = 100) 
tuner <- tnr("grid_search")
resample_inner <- rsmp("cv", folds = 10)
measures <- msrs(c("classif.ce", "classif.acc"))

11.1 Tuning : setting glmnet hyperparameter and training the tuned learner

# Glmnet is a package that fits a generalized linear model via penalized maximum likelihood.Gaussian is the default family in glmnet and glmnet provides various options for users to customise the fit,the 3 option chosen here are : 
# alpha is for the elastic-net mixing parameter α, with range α∈[0,1]. α=1 is the lasso (default) and α=0 is the ridge.
# Value(s)of the penalty parameter lambda at which predictions are required. Default is the entire sequence used to create the model.
# eps is the  minimum value of lambda.min.ratio ; factory default= 1.0e-6

tune_ps <- ParamSet$new(
  params = list(ParamDbl$new("classif.glmnet.alpha",lower=0.8,upper=0.9),
                ParamDbl$new("classif.glmnet.s",lower=0.05,upper=0.07),
                ParamDbl$new("classif.glmnet.eps",lower = 0.0000000004,upper=0.00005)
  )
)

learner_tunedglmnet <- AutoTuner$new(
  learner = learner_glmnet,
  resampling = resample_inner,
  measures = measures,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner)

set.seed(1234)
tic("start training")
learner_tunedglmnet$train(task_combat, row_ids = train_set)
toc()

11.2 Tuning : setting logReg hyperparameter and training the tuned learner

tune_ps <- ParamSet$new(
  params = list(ParamInt$new("classif.log_reg.maxit",lower=10,upper=50),
                ParamDbl$new("classif.log_reg.epsilon",lower=0.000000001,upper=0.000011)
  )
)

learner_tunedlog <- AutoTuner$new(learner_log,resample_inner,measures,tune_ps, terminator, tuner)

set.seed(1234)
tic("start training")
learner_tunedlog$train(task_combat, row_ids = train_set)
toc()

11.3 Tuning : setting naive bayes hyperparameter and training the tuned learner

# Threshold is the value by which zero probabilities or probabilities within the epsilon-range corresponding to metric variables are replaced 
# Laplace (additive) smoothing handles categorical variables .
# eps is the  value that specifies an epsilon-range to replace zero or close to zero probabilities by threshold. It applies to metric variables.

tune_ps <- ParamSet$new(
  params = list(ParamDbl$new("classif.naive_bayes.laplace",lower = 0,upper=1),
                ParamDbl$new("classif.naive_bayes.threshold",lower=0.04444,upper=1),
                ParamDbl$new("classif.naive_bayes.eps",lower=0.00000000001,upper = 0.000000001)
  )
)

learner_tunednb <- AutoTuner$new(
  learner = learner_nb,
  resampling = resample_inner,
  measures = measures,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner)

set.seed(1234)
tic("start training")
learner_tunednb$train(task_combat, row_ids = train_set)
toc()

11.4 Tuning : setting kknn hyperparameter and training the tuned learner

# for KKNN the main aspects are:
# - the distance measure which is used to define similarity (respectively dissimilarity) between the Pokemon battles. mlr3-kknn supports the Minowski-distance with different parameters. We will consider distance-parameter 1,2 and 3.
# - Minowksi-distance with parameter 1 is the Manhattan-distance
# - Minowksi-distance with parameter 2 is the Euclidean distance
# - Minowski-distance with parameter 3 is not so commonly used, but could also lead to better results in specific cases
## - the number of closest neighbors considered to predict an observation (k)
# - we have < 50000 observations in the data set and ~ 800 different Pokemon which could be used in a battle. 
# - this means the probability for two Pokemon to be selected as competitors in a battle is ~ (1/800) * (1/800) * 2 = 0.000003125
# - this means we expect 0.000003125 * 50000 = 0.15625 battles between each pair of Pokemons. Since Pokemon fights could be estimated best by looking at the same competitors results in the past, we expect a rather low k to be the best parameter. Since this assumption is based on intuition rather than on scientific proof, we still choose a rather high parameter value limit for k (200). 

tune_ps <- ParamSet$new(
  params = list(ParamInt$new("classif.kknn.k",lower=1,upper=200),
                ParamInt$new("classif.kknn.distance",lower=1,upper=3)
  )
)

learner_tunedkknn <- AutoTuner$new(
  learner = learner_kknn,
  resampling = resample_inner,
  measures = measures,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner)

set.seed(1234)
tic("start training")
learner_tunedkknn$train(task_combat, row_ids = train_set)
toc()

11.5 Tuning : setting ranger(RF) hyperparameter and training the tuned learner

tune_ps <- ParamSet$new(list(
  ParamInt$new("classif.ranger.num.trees", lower = 10, upper = 300), 
  #ParamFct$new("classif.ranger.importance", levels = c('none','impurity','impurity_corrected','permutation')),  #ecommended "impurity"
  #ParamFct$new("classif.ranger.splitrule", levels = c('variance','extratrees','maxstat')), # Gini (but here no gini? #option: variance,extratrees,maxstat)
  ParamInt$new("classif.ranger.min.node.size", lower = 1, upper = 10), 
  #ParamInt$new("classif.ranger.num.random.splits", lower = 1, upper = 10, default = 1)
  ParamInt$new("classif.ranger.mtry", lower = 1, upper = 1) 
  # if you change mtry, mse increases here, hence default values got used. 
  #ParamDbl$new("classif.ranger.alpha", lower = 0.3, upper =0.8, default = 0.5) 
))

learner_tunedRF <- AutoTuner$new(
  learner = learner_RF,
  resampling = resample_inner,
  measures = measures,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner
)

set.seed(1234)
tic("start training")
learner_tunedRF$train(task_combat, row_ids = train_set)
toc()

11.6 Tuning : setting xgboost hyperparameter and training the tuned learner

tune_ps <- ParamSet$new(list(
  ParamDbl$new("classif.xgboost.eta", lower = 0.3, upper = 0.5), # default 0.3
  # eta : learning rate.
  # eta increasing -> training error decreases faster in the training
  # Step size shrinkage used in update to prevents overfitting. After each boosting step, 
  # we can directly get the weights of new features, and eta shrinks the feature weights to make the boosting process      
  # more conservative.
  ParamDbl$new("classif.xgboost.gamma", lower = 0, upper = 0), # default 0 gamma = 0 is the best here
  # gamma : Minimum loss reduction required to make a further partition on a leaf node of the tree. 
  # gamma increasing -> training error decreases slower in the training
  # The larger gamma is, the more conservative the algorithm will be.
  ParamInt$new("classif.xgboost.max_depth", lower = 15, upper = 20), # default 6
  # Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit. 
  # 0 is only accepted in lossguided growing policy when tree_method is set as hist and it indicates no limit on depth. 
  # Beware that XGBoost aggressively consumes memory when training a deep tree.
  ParamDbl$new("classif.xgboost.min_child_weight", lower = 0.01, upper= 0.01),#default 1
  #min_child_weight : Minimum sum of instance weight (hessian) needed in a child. 
  #If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, 
  #then the building process will give up further partitioning. In linear regression task, 
  #this simply corresponds to minimum number of instances needed to be in each node. 
  #The larger min_child_weight is, the more conservative the algorithm will be.
  ParamInt$new("classif.xgboost.nrounds", lower = 30, upper = 100) # Number of Tree : more trees less train error
  # If other hyperparameters would have been tuned well enough, nrounds needs to be big. if it increases, it could cause longer running time.
))

learner_tunedxgboost <- AutoTuner$new(
  learner = learner_xgboost,
  resampling = resample_inner,
  measures = measures,
  tune_ps = tune_ps,
  terminator = terminator,
  tuner = tuner
)

set.seed(1234)
tic("start training")
learner_tunedxgboost$train(task_combat, row_ids = train_set)
toc()

12.Prediction with the tuned learners

#glmnet
learner_tunedglmnet$tuning_result
## $tune_x
## $tune_x$classif.glmnet.alpha
## [1] 0.8
## 
## $tune_x$classif.glmnet.s
## [1] 0.05
## 
## $tune_x$classif.glmnet.eps
## [1] 5.555911e-06
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.glmnet.alpha
## [1] 0.8
## 
## $params$classif.glmnet.s
## [1] 0.05
## 
## $params$classif.glmnet.eps
## [1] 5.555911e-06
## 
## 
## $perf
##  classif.ce classif.acc 
##  0.91337259  0.08662741
tunedglmnet_prediction = learner_tunedglmnet$predict(task_combat, row_ids = test_set)

#log_reg
learner_tunedlog$tuning_result
## $tune_x
## $tune_x$classif.log_reg.maxit
## [1] 19
## 
## $tune_x$classif.log_reg.epsilon
## [1] 1.1e-05
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.log_reg.maxit
## [1] 19
## 
## $params$classif.log_reg.epsilon
## [1] 1.1e-05
## 
## 
## $perf
##  classif.ce classif.acc 
##   0.1008042   0.8991958
tunedlog_prediction = learner_tunedlog$predict(task_combat, row_ids = test_set)

#naive bayes
learner_tunednb$tuning_result
## $tune_x
## $tune_x$classif.naive_bayes.laplace
## [1] 1
## 
## $tune_x$classif.naive_bayes.threshold
## [1] 0.36296
## 
## $tune_x$classif.naive_bayes.eps
## [1] 2.3e-10
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.naive_bayes.laplace
## [1] 1
## 
## $params$classif.naive_bayes.threshold
## [1] 0.36296
## 
## $params$classif.naive_bayes.eps
## [1] 2.3e-10
## 
## 
## $perf
##  classif.ce classif.acc 
##   0.1724583   0.8275417
tunednb_prediction = learner_tunednb$predict(task_combat, row_ids = test_set)

#kknn
learner_tunedkknn$tuning_result
## $tune_x
## $tune_x$classif.kknn.k
## [1] 67
## 
## $tune_x$classif.kknn.distance
## [1] 1
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.kknn.k
## [1] 67
## 
## $params$classif.kknn.distance
## [1] 1
## 
## 
## $perf
##  classif.ce classif.acc 
##  0.06618383  0.93381617
tunedkknn_prediction = learner_tunedkknn$predict(task_combat, row_ids = test_set)

#random forest (ranger)
learner_tunedRF$tuning_result
## $tune_x
## $tune_x$classif.ranger.num.trees
## [1] 203
## 
## $tune_x$classif.ranger.min.node.size
## [1] 1
## 
## $tune_x$classif.ranger.mtry
## [1] 1
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.ranger.num.trees
## [1] 203
## 
## $params$classif.ranger.min.node.size
## [1] 1
## 
## $params$classif.ranger.mtry
## [1] 1
## 
## 
## $perf
##  classif.ce classif.acc 
##  0.05449829  0.94550171
tunedRF_prediction = learner_tunedxgboost$predict(task_combat, row_ids = test_set)

#xgboost
learner_tunedxgboost$tuning_result
## $tune_x
## $tune_x$classif.xgboost.eta
## [1] 0.3222222
## 
## $tune_x$classif.xgboost.gamma
## [1] 0
## 
## $tune_x$classif.xgboost.max_depth
## [1] 19
## 
## $tune_x$classif.xgboost.min_child_weight
## [1] 0.01
## 
## $tune_x$classif.xgboost.nrounds
## [1] 30
## 
## 
## $params
## $params$encode.method
## [1] "treatment"
## 
## $params$encode.affect_columns
## selector_type("factor")
## 
## $params$classif.xgboost.nrounds
## [1] 30
## 
## $params$classif.xgboost.verbose
## [1] 0
## 
## $params$classif.xgboost.eta
## [1] 0.3222222
## 
## $params$classif.xgboost.gamma
## [1] 0
## 
## $params$classif.xgboost.max_depth
## [1] 19
## 
## $params$classif.xgboost.min_child_weight
## [1] 0.01
## 
## 
## $perf
##  classif.ce classif.acc 
##  0.05683553  0.94316447
tunedxgboost_prediction = learner_tunedxgboost$predict(task_combat, row_ids = test_set)

13.Compare Prediction Before & After Tuning Models

# tuned glmnet
glmnet_prediction$score(measures)
##  classif.ce classif.acc 
##  0.90652286  0.09347714
tunedglmnet_prediction$score(measures)
##  classif.ce classif.acc 
##  0.91319979  0.08680021
# tuned log_reg
log_prediction$score(measures)
##  classif.ce classif.acc 
##  0.09912686  0.90087314
tunedlog_prediction$score(measures)
##  classif.ce classif.acc 
##  0.09912686  0.90087314
# tuned naive bayes
nb_prediction$score(measures)
##  classif.ce classif.acc 
##   0.1633282   0.8366718
tunednb_prediction$score(measures)
##  classif.ce classif.acc 
##   0.1631228   0.8368772
# tuned KKNN
kknn_prediction$score(measures)
##  classif.ce classif.acc 
##  0.07693888  0.92306112
tunedkknn_prediction$score(measures)
##  classif.ce classif.acc 
##  0.06789933  0.93210067
# tuned ranger (RF)
RF_prediction$score(measures)
##  classif.ce classif.acc 
##  0.05649718  0.94350282
tunedRF_prediction$score(measures)
##  classif.ce classif.acc 
##  0.05536723  0.94463277
# tuned xgboost
xgboost_prediction$score(measures)
##  classif.ce classif.acc 
##  0.05721623  0.94278377
tunedxgboost_prediction$score(measures)
##  classif.ce classif.acc 
##  0.05536723  0.94463277

14.Benchmark

# for benchmarking,we first define our new models with the hyperparameters we got after tuning :
learner_glm_bench<- lrn("classif.glmnet",alpha=0.85,s=0.06,eps=0.0000000045, predict_type = "prob")
learner_log_bench<-lrn("classif.log_reg",maxit=20,epsilon=0.00000001, predict_type = "prob")
learner_naive_bench<- lrn("classif.naive_bayes",laplace=0.5,threshold=0.5,eps=0.0000000001, predict_type = "prob")
learner_kknn_bench<- lrn("classif.kknn",k=45, distance=1, predict_type = "prob")
learner_ranger_bench <-lrn("classif.ranger",num.trees = 203, min.node.size = 2, mtry=1, predict_type = "prob")
learner_xg_bench <-lrn("classif.xgboost",eta = 0.4777778, gamma = 0, max_depth = 15, 
                       min_child_weight =0.01, nrounds = 30 ,predict_type = "prob")

learners<-list(learner_glm_bench,learner_log_bench, 
               learner_naive_bench,learner_kknn_bench
               ,learner_ranger_bench,learner_xg_bench)

design<-benchmark_grid(tasks = task_combat,learners = learners,resamplings =rsmp("cv", folds = 3))

bmr <-benchmark(design)
## INFO  [12:32:56.572] Benchmark with 18 resampling iterations 
## INFO  [12:32:56.578] Applying learner 'classif.glmnet' on task 'task_combat' (iter 2/3) 
## INFO  [12:32:56.866] Applying learner 'classif.xgboost' on task 'task_combat' (iter 1/3) 
## INFO  [12:32:58.543] Applying learner 'classif.naive_bayes' on task 'task_combat' (iter 3/3) 
## INFO  [12:33:01.035] Applying learner 'classif.log_reg' on task 'task_combat' (iter 1/3) 
## INFO  [12:33:01.170] Applying learner 'classif.kknn' on task 'task_combat' (iter 1/3) 
## INFO  [12:33:37.869] Applying learner 'classif.naive_bayes' on task 'task_combat' (iter 2/3) 
## INFO  [12:33:40.359] Applying learner 'classif.glmnet' on task 'task_combat' (iter 3/3) 
## INFO  [12:33:40.645] Applying learner 'classif.ranger' on task 'task_combat' (iter 3/3) 
## INFO  [12:33:42.468] Applying learner 'classif.kknn' on task 'task_combat' (iter 2/3) 
## INFO  [12:34:19.371] Applying learner 'classif.kknn' on task 'task_combat' (iter 3/3) 
## INFO  [12:34:56.738] Applying learner 'classif.log_reg' on task 'task_combat' (iter 3/3) 
## INFO  [12:34:56.870] Applying learner 'classif.xgboost' on task 'task_combat' (iter 2/3) 
## INFO  [12:34:58.567] Applying learner 'classif.ranger' on task 'task_combat' (iter 2/3) 
## INFO  [12:35:00.349] Applying learner 'classif.ranger' on task 'task_combat' (iter 1/3) 
## INFO  [12:35:02.225] Applying learner 'classif.glmnet' on task 'task_combat' (iter 1/3) 
## INFO  [12:35:02.527] Applying learner 'classif.xgboost' on task 'task_combat' (iter 3/3) 
## INFO  [12:35:04.242] Applying learner 'classif.naive_bayes' on task 'task_combat' (iter 1/3) 
## INFO  [12:35:06.989] Applying learner 'classif.log_reg' on task 'task_combat' (iter 2/3) 
## INFO  [12:35:07.140] Finished benchmark

14.1 Benchmark Result

##    nr  resample_result     task_id          learner_id resampling_id iters classif.ce classif.acc
## 1:  1 <ResampleResult> task_combat      classif.glmnet            cv     3 0.91742686  0.08257314
## 2:  2 <ResampleResult> task_combat     classif.log_reg            cv     3 0.10200937  0.89799063
## 3:  3 <ResampleResult> task_combat classif.naive_bayes            cv     3 0.17122781  0.82877219
## 4:  4 <ResampleResult> task_combat        classif.kknn            cv     3 0.06683514  0.93316486
## 5:  5 <ResampleResult> task_combat      classif.ranger            cv     3 0.05489809  0.94510191
## 6:  6 <ResampleResult> task_combat     classif.xgboost            cv     3 0.05777449  0.94222551

15. Pokemon Duell

# Choose your two Pokemons and predict the winner using the Benchmark Best Learner
# Visulisation : The probability for first Pokemon to win via guage graph 
# Inputs are Pokemon id numbers from the pokemon dataset

pokemonA <- "123"
pokemonB <- "321"

#train the best learner from Benchmark
learner_ranger_bench$train(task_combat)

#remember_prob remembers last probability of a battle prediction. This just has to be set before the first function use one time
remember_prob <- as.double(NA)
#this function runs the prediction for two fighting Pokemon and then visualizes the probability for Pokemon A to win. 
who_wins <- function(pokemonA, pokemonB){
  data_for_classification <- data.frame(row.names=1)
  data_for_classification$First_pokemon <- pokemonA
  data_for_classification$Second_pokemon <- pokemonB
  
  # calculate stat differences
  data_for_classification$First_pokemon_attack<-sapply(data_for_classification$First_pokemon, function(x) pokemon$attack[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_attack<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$attack[match(x, pokemon$id)])
  data_for_classification$First_pokemon_HP<-sapply(data_for_classification$First_pokemon, function(x) pokemon$hp[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_HP<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$hp[match(x, pokemon$id)])
  data_for_classification$First_pokemon_defense<-sapply(data_for_classification$First_pokemon, function(x) pokemon$defense[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_defense<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$defense[match(x, pokemon$id)])
  data_for_classification$First_pokemon_sp_atk<-sapply(data_for_classification$First_pokemon, function(x) pokemon$sp_attack[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_sp_atk<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$sp_attack[match(x, pokemon$id)])
  data_for_classification$First_pokemon_sp_def<-sapply(data_for_classification$First_pokemon, function(x) pokemon$sp_defense[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_sp_def<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$sp_defense[match(x, pokemon$id)])
  data_for_classification$First_pokemon_speed<-sapply(data_for_classification$First_pokemon, function(x) pokemon$speed[match(x, pokemon$id)])
  data_for_classification$Second_pokemon_speed<-sapply(data_for_classification$Second_pokemon, function(x) pokemon$speed[match(x, pokemon$id)])
  
  data_for_classification$attackVSattack_diff <- data_for_classification$First_pokemon_attack-data_for_classification$Second_pokemon_attack
  data_for_classification$defenseVSdefense_diff <- data_for_classification$First_pokemon_defense-data_for_classification$Second_pokemon_defense
  data_for_classification$sp_atkVSsp_atk_diff <- data_for_classification$First_pokemon_sp_atk-data_for_classification$Second_pokemon_sp_atk
  data_for_classification$sp_defVSsp_def_diff <- data_for_classification$First_pokemon_sp_def-data_for_classification$Second_pokemon_sp_def
  data_for_classification$speedVSspeed_diff <- data_for_classification$First_pokemon_speed-data_for_classification$Second_pokemon_speed
  data_for_classification$HPVSHP_diff <- data_for_classification$First_pokemon_HP-data_for_classification$Second_pokemon_HP
  
  #select the variables you need for your specific model
  data_for_classification <<- data_for_classification %>% 
    dplyr::select(speedVSspeed_diff,
                  attackVSattack_diff,
                  sp_atkVSsp_atk_diff,
                  sp_defVSsp_def_diff,
                  HPVSHP_diff)
  
  #insert final model here. "ranger" is placeholder
  prediction <- learner_ranger_bench$predict_newdata(newdata=data_for_classification)
  prob <- prediction$prob[,2] 
  
  fig <- plot_ly(
    type = "indicator",
    mode = "gauge+number+delta",
    value = prob,
    title = list(text = "Probability for First Pokemon to win", font = list(size = 30)),
    delta = list(reference = remember_prob, increasing = list(color = "lightgreen"),decreasing=list(color="red")),
    gauge = list(
      axis = list(range = list(0, 1), tickwidth = 1, tickcolor = "black"),
      bar = list(color = "seagreen"),
      bgcolor = "palegray",
      borderwidth = 2,
      bordercolor = "gray",
      steps = list(
        list(range = c(0.0,0.2), color = "white"),
        list(range = c(0.2,0.4), color = "white"),
        list(range = c(0.4,0.6), color = "white"),
        list(range = c(0.6,0.8), color = "white"),  
        list(range = c(0.8,1), color = "white")))) 
  fig <- fig %>%
    layout(
      margin = list(l=5,r=10),
      paper_bgcolor = "white",
      font = list(color = "gray", family = "Arial",size=20))
  
  remember_prob <<- prob[1] %>% as.numeric()
  fig
}

15.1 Now you can play Pokemon Battle with Machine Learning!

Pokemon Battle : Choose two pokemons(from 1 to 800) and You can check the first Pokemon’s possibility of being a winner

#Play1
who_wins(pokemonA = "284", pokemonB = "324")
#Play2
who_wins(pokemonA = "12", pokemonB = "45")
#Play3
who_wins(pokemonA = "1", pokemonB = "29")